{ "cells": [ { "cell_type": "markdown", "id": "ba522cca-d443-41f9-9026-efceb42a5aad", "metadata": {}, "source": [ "# Choropleth example\n", "This is an example of creating choropleth maps using [Altair](https://altair-viz.github.io/index.html)." ] }, { "cell_type": "code", "execution_count": 1, "id": "6f4f0d94-200c-47b5-90fd-3ccc3314505a", "metadata": {}, "outputs": [], "source": [ "import csv\n", "import json\n", "from pathlib import Path\n", "\n", "import altair as alt\n", "from vega_datasets import data as vega_data\n", "\n", "from pyinaturalist import iNatClient\n", "\n", "# Change to any species or state\n", "TAXON_NAME = 'Buteo jamaicensis' # Red-tailed hawk\n", "STATE_ABBREV = 'IA'\n", "\n", "taxon_slug = TAXON_NAME.replace(' ', '_').lower()\n", "SAMPLE_DATA_DIR = Path('.').parent / 'sample_data'\n", "COUNTY_CACHE_FILE = Path(f'{taxon_slug}_{STATE_ABBREV}_counties.json')\n", "COUNTY_ID_LOOKUP = SAMPLE_DATA_DIR / 'us_county_place_ids.csv'\n", "\n", "STATE_CACHE_FILE = Path(f'{taxon_slug}_us_states.json')\n", "STATE_ID_LOOKUP = SAMPLE_DATA_DIR / 'us_state_place_ids.csv'\n", "STATE_POPULATION_LOOKUP = SAMPLE_DATA_DIR / 'us_state_populations.csv'\n", "EXCLUDED_STATE_FIPS = {2, 15} # Exclude AK and HI for contiguous US map\n", "\n", "client = iNatClient()" ] }, { "cell_type": "markdown", "id": "22857238-653b-4675-9bfa-c9049c7495bc", "metadata": {}, "source": [ "## County-level map\n", "We'll start by visualizing observations per county within a given state.\n", "\n", "Sample data is included that maps US county FIPS codes to iNaturalist place IDs. We'll start by loading that and then fetching observation counts per county:" ] }, { "cell_type": "code", "execution_count": 2, "id": "0afad5c3-549e-4a49-b9eb-3f20ac45d2e8", "metadata": { "scrolled": true }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Loading county counts from cache: buteo_jamaicensis_IA_counties.json\n" ] } ], "source": [ "def load_county_lookup(state_abbrev: str) -> dict[str, int]:\n", " \"\"\"Return {5-digit-FIPS-string: inat_place_id} for counties in the given state.\"\"\"\n", " with open(COUNTY_ID_LOOKUP, newline='') as f:\n", " return {\n", " row['fips_code'].zfill(5): int(row['inat_place_id'])\n", " for row in csv.DictReader(f)\n", " if row['state_abbr'] == state_abbrev\n", " }\n", "\n", "\n", "def fetch_county_counts(fips_to_inat: dict[str, int]) -> dict[str, int]:\n", " \"\"\"Fetch observation counts per county, returning {fips: count}.\"\"\"\n", " counts: dict[str, int] = {}\n", " total = len(fips_to_inat)\n", " for i, (fips, place_id) in enumerate(fips_to_inat.items(), start=1):\n", " print(f' County {i}/{total} (FIPS {fips}, place_id {place_id})')\n", " count = client.observations.search(\n", " taxon_name=TAXON_NAME,\n", " place_id=place_id,\n", " verifiable=True,\n", " ).count()\n", " counts[fips] = count\n", " return counts\n", "\n", "\n", "def load_or_fetch_county_counts(state_abbrev: str) -> dict[str, int]:\n", " \"\"\"Load county counts from cache if available, otherwise fetch from API.\"\"\"\n", " if COUNTY_CACHE_FILE.exists():\n", " print(f'Loading county counts from cache: {COUNTY_CACHE_FILE}')\n", " with open(COUNTY_CACHE_FILE) as f:\n", " return json.load(f)\n", "\n", " print(f'Fetching county observation counts for {state_abbrev}...')\n", " fips_to_inat = load_county_lookup(state_abbrev)\n", " counts = fetch_county_counts(fips_to_inat)\n", "\n", " with open(COUNTY_CACHE_FILE, 'w') as f:\n", " json.dump(counts, f)\n", " print(f'Cached to {COUNTY_CACHE_FILE}')\n", " return counts\n", "\n", "\n", "counts = load_or_fetch_county_counts(STATE_ABBREV)" ] }, { "cell_type": "markdown", "id": "9ae458e2-3107-4a3d-b553-a73283e0e80f", "metadata": {}, "source": [ "Then we can map these counts over the [vega-data US 1m](https://github.com/vega/vega/blob/main/docs/data/us-10m.json) dataset.\n", "\n", "Reference docs:\n", "* Altair [Chart API](https://altair-viz.github.io/user_guide/generated/toplevel/altair.Chart.html)\n", "* [topo_feature](https://altair-viz.github.io/user_guide/generated/api/altair.topo_feature.html#altair.topo_feature)\n", "* [Geoshape](https://altair-viz.github.io/user_guide/marks/geoshape.html)\n", "* [minimal choropleth example](https://altair-viz.github.io/gallery/choropleth.html)\n", "* [vega-datasets](https://github.com/vega/vega-datasets)" ] }, { "cell_type": "code", "execution_count": 3, "id": "6381c037-21b7-47d6-a41f-996c3ca8ffa4", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n"
      ],
      "text/plain": []
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/html": [
       "\n",
       "\n",
       "
\n", "" ], "text/plain": [ "\u001b[1;35malt.Chart\u001b[0m\u001b[1m(\u001b[0m\u001b[33m...\u001b[0m\u001b[1m)\u001b[0m" ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Convert counts to a list of records for Altair\n", "with open(COUNTY_ID_LOOKUP, newline='') as f:\n", " fips_to_name = {\n", " row['fips_code'].zfill(5): row['county_name']\n", " for row in csv.DictReader(f)\n", " if row['state_abbr'] == STATE_ABBREV\n", " }\n", "\n", "count_records = [\n", " {'fips': int(fips), 'count': count, 'name': fips_to_name.get(fips, fips)}\n", " for fips, count in counts.items()\n", "]\n", "counties_topo = alt.topo_feature(vega_data.us_10m.url, 'counties')\n", "state_fips_prefix = {fips[:2] for fips in counts}\n", "state_fips_int = int(next(iter(state_fips_prefix)))\n", "\n", "chart = (\n", " alt.Chart(counties_topo)\n", " .mark_geoshape()\n", " .transform_filter(f'floor(datum.id / 1000) == {state_fips_int}')\n", " .transform_lookup(\n", " lookup='id',\n", " from_=alt.LookupData(\n", " data=alt.InlineData(values=count_records),\n", " key='fips',\n", " fields=['count', 'name'],\n", " ),\n", " )\n", " .encode(\n", " color=alt.Color(\n", " 'count:Q',\n", " scale=alt.Scale(scheme='viridis'),\n", " legend=alt.Legend(title='Observations'),\n", " ),\n", " tooltip=[\n", " alt.Tooltip('name:N', title='County'),\n", " alt.Tooltip('count:Q', title='Observations'),\n", " ],\n", " )\n", " .project('albersUsa')\n", " .properties(\n", " title=f'{TAXON_NAME} observations by county — {STATE_ABBREV}',\n", " width=700,\n", " height=500,\n", " )\n", ")\n", "\n", "# Optionally export to HTML:\n", "# chart.save(f'{STATE_ABBREV}_counties.html')\n", "\n", "chart" ] }, { "cell_type": "markdown", "id": "34e2241e-9b01-4603-a2cd-af348fef8989", "metadata": {}, "source": [ "## State-level map\n", "Next, we will visualize observations per state within the US.\n", "\n", "Note: Using the same tools, you can map county-level data for the whole country, but since that requires 3000+ API calls, we'll just stick to state-level data for the purposes of this demo.\n", "\n", "Sample data is included that maps US state FIPS codes to iNaturalist place IDs. We'll start by loading that and then fetching observation counts per state:" ] }, { "cell_type": "code", "execution_count": 4, "id": "7b93f484-3f51-44e3-83d0-eea9b37760c3", "metadata": { "scrolled": true }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Loading state counts from cache: buteo_jamaicensis_us_states.json\n" ] } ], "source": [ "def load_state_lookup() -> dict[int, int]:\n", " \"\"\"Return {state_fips: inat_place_id} for the 48 contiguous US states.\"\"\"\n", " with open(STATE_ID_LOOKUP, newline='') as f:\n", " return {\n", " int(row['state_fips']): int(row['inat_place_id'])\n", " for row in csv.DictReader(f)\n", " if int(row['state_fips']) not in EXCLUDED_STATE_FIPS\n", " }\n", "\n", "\n", "def fetch_state_counts(state_map: dict[int, int]) -> dict[int, int]:\n", " \"\"\"Fetch observation counts per state, returning {state_fips: count}.\"\"\"\n", " counts: dict[int, int] = {}\n", " total = len(state_map)\n", " for i, (fips, place_id) in enumerate(state_map.items(), start=1):\n", " print(f' State {i}/{total} (FIPS {fips}, place_id {place_id})')\n", " count = client.observations.search(\n", " taxon_name=TAXON_NAME,\n", " place_id=place_id,\n", " verifiable=True,\n", " ).count()\n", " counts[fips] = count\n", " return counts\n", "\n", "\n", "def load_or_fetch_state_counts() -> dict[int, int]:\n", " \"\"\"Load state counts from cache if available, otherwise fetch from API.\"\"\"\n", " if STATE_CACHE_FILE.exists():\n", " print(f'Loading state counts from cache: {STATE_CACHE_FILE}')\n", " with open(STATE_CACHE_FILE) as f:\n", " # JSON keys are strings; convert back to int\n", " return {int(k): v for k, v in json.load(f).items()}\n", "\n", " print('Fetching state-level observation counts for contiguous US...')\n", " state_map = load_state_lookup()\n", " counts = fetch_state_counts(state_map)\n", "\n", " with open(STATE_CACHE_FILE, 'w') as f:\n", " json.dump(counts, f)\n", " print(f'Cached to {STATE_CACHE_FILE}')\n", " return counts\n", "\n", "\n", "counts = load_or_fetch_state_counts()" ] }, { "cell_type": "code", "execution_count": 5, "id": "1332ebb3-96d0-43ef-b160-413fc7fa4848", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n"
      ],
      "text/plain": []
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/html": [
       "\n",
       "\n",
       "
\n", "" ], "text/plain": [ "\u001b[1;35malt.Chart\u001b[0m\u001b[1m(\u001b[0m\u001b[33m...\u001b[0m\u001b[1m)\u001b[0m" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "with open(STATE_POPULATION_LOOKUP, newline='') as f:\n", " fips_to_state_name = {int(row['state_fips']): row['state_name'] for row in csv.DictReader(f)}\n", "\n", "count_records = [\n", " {'fips': fips, 'count': count, 'name': fips_to_state_name.get(fips, str(fips))}\n", " for fips, count in counts.items()\n", "]\n", "states_topo = alt.topo_feature(vega_data.us_10m.url, 'states')\n", "\n", "chart = (\n", " alt.Chart(states_topo)\n", " .mark_geoshape()\n", " .transform_lookup(\n", " lookup='id',\n", " from_=alt.LookupData(\n", " data=alt.InlineData(values=count_records),\n", " key='fips',\n", " fields=['count', 'name'],\n", " ),\n", " )\n", " .encode(\n", " color=alt.Color(\n", " 'count:Q',\n", " scale=alt.Scale(scheme='viridis'),\n", " legend=alt.Legend(title='Observations'),\n", " ),\n", " tooltip=[\n", " alt.Tooltip('name:N', title='State'),\n", " alt.Tooltip('count:Q', title='Observations'),\n", " ],\n", " )\n", " .project('albersUsa')\n", " .properties(\n", " title=f'{TAXON_NAME} observations by state — contiguous US',\n", " width=900,\n", " height=500,\n", " )\n", ")\n", "\n", "\n", "# Optionally export to HTML:\n", "# chart.save('us_states.html')\n", "\n", "chart" ] }, { "cell_type": "markdown", "id": "9ce40986-3b3a-41a4-ba9c-c44df69e4119", "metadata": {}, "source": [ "## Normalized state-level map\n", "Like most observation maps, the results are heavily correlated with observer density. Let's normalize by state population to compensate for that.\n", "\n", "Source: [2020 US Census apportionment data](https://www.census.gov/data/tables/2020/dec/2020-apportionment-data.html)" ] }, { "cell_type": "code", "execution_count": 6, "id": "f026f2f2-cf03-466a-ae91-c71ba2199a47", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n"
      ],
      "text/plain": []
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/html": [
       "\n",
       "\n",
       "
\n", "" ], "text/plain": [ "\u001b[1;35malt.Chart\u001b[0m\u001b[1m(\u001b[0m\u001b[33m...\u001b[0m\u001b[1m)\u001b[0m" ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# {state_fips: population} and {state_fips: state_name} for all 50 states\n", "with open(STATE_POPULATION_LOOKUP, newline='') as f:\n", " pop_data = list(csv.DictReader(f))\n", " populations = {int(row['state_fips']): int(row['population']) for row in pop_data}\n", " fips_to_state_name = {int(row['state_fips']): row['state_name'] for row in pop_data}\n", "\n", "count_records = [\n", " {\n", " 'fips': fips,\n", " 'per_100k': round(count / populations[fips] * 100_000, 2),\n", " 'name': fips_to_state_name.get(fips, str(fips)),\n", " }\n", " for fips, count in counts.items()\n", " if fips in populations\n", "]\n", "\n", "states_topo = alt.topo_feature(vega_data.us_10m.url, 'states')\n", "\n", "chart = (\n", " alt.Chart(states_topo)\n", " .mark_geoshape()\n", " .transform_lookup(\n", " lookup='id',\n", " from_=alt.LookupData(\n", " data=alt.InlineData(values=count_records),\n", " key='fips',\n", " fields=['per_100k', 'name'],\n", " ),\n", " )\n", " .encode(\n", " color=alt.Color(\n", " 'per_100k:Q',\n", " scale=alt.Scale(scheme='viridis'),\n", " legend=alt.Legend(title='Obs. per 100k residents'),\n", " ),\n", " tooltip=[\n", " alt.Tooltip('name:N', title='State'),\n", " alt.Tooltip('per_100k:Q', title='Obs. per 100k residents'),\n", " ],\n", " )\n", " .project('albersUsa')\n", " .properties(\n", " title=f'{TAXON_NAME} observations per 100k residents by state',\n", " width=900,\n", " height=500,\n", " )\n", ")\n", "\n", "chart" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.13.12" } }, "nbformat": 4, "nbformat_minor": 5 }